# -*- coding: utf-8 -*-
"""
爬虫配置文件
"""

import os
from pathlib import Path

# 项目根目录
BASE_DIR = Path(__file__).parent.parent

# 数据存储目录
DATA_DIR = BASE_DIR / "data"
NOVELS_DIR = DATA_DIR / "novels"
CHAPTERS_DIR = DATA_DIR / "chapters"
LOGS_DIR = DATA_DIR / "logs"

# 创建必要的目录
for dir_path in [DATA_DIR, NOVELS_DIR, CHAPTERS_DIR, LOGS_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

# 爬虫配置
CRAWLER_CONFIG = {
    # 请求配置
    "timeout": 30,
    "retry_times": 3,
    "retry_delay": 2,
    
    # 延迟配置
    "request_delay": 1,  # 请求间隔(秒)
    "random_delay": True,  # 是否使用随机延迟
    
    # 并发配置
    "max_concurrent": 5,
    "semaphore_limit": 10,
    
    # 请求头配置
    "default_headers": {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Accept-Encoding": "gzip, deflate",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
}

# 数据库配置
DATABASE_CONFIG = {
    "mysql": {
        "host": "localhost",
        "port": 3306,
        "user": "root",
        "password": "password",
        "database": "novel_crawler",
        "charset": "utf8mb4"
    },
    "mongodb": {
        "host": "localhost",
        "port": 27017,
        "database": "novel_crawler",
        "collections": {
            "novels": "novels",
            "chapters": "chapters",
            "authors": "authors"
        }
    },
    "redis": {
        "host": "localhost",
        "port": 6379,
        "db": 0,
        "password": None
    }
}

# 小说网站配置
NOVEL_SITES = {
    "qidian": {
        "name": "起点中文网",
        "base_url": "https://www.qidian.com",
        "search_url": "https://www.qidian.com/search/",
        "novel_url_pattern": r"https://book\.qidian\.com/info/\d+",
        "chapter_url_pattern": r"https://vipreader\.qidian\.com/chapter/\d+/\d+",
        "encoding": "utf-8"
    },
    "zongheng": {
        "name": "纵横中文网",
        "base_url": "http://www.zongheng.com",
        "search_url": "http://search.zongheng.com/s/",
        "novel_url_pattern": r"http://book\.zongheng\.com/book/\d+\.html",
        "chapter_url_pattern": r"http://book\.zongheng\.com/chapter/\d+/\d+\.html",
        "encoding": "utf-8"
    },
    "17k": {
        "name": "17K小说网",
        "base_url": "https://www.17k.com",
        "search_url": "https://search.17k.com/search.html",
        "novel_url_pattern": r"https://www\.17k\.com/book/\d+\.html",
        "chapter_url_pattern": r"https://www\.17k\.com/chapter/\d+/\d+\.html",
        "encoding": "utf-8"
    }
}

# 日志配置
LOG_CONFIG = {
    "level": "INFO",
    "format": "{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}",
    "file": LOGS_DIR / "crawler.log",
    "rotation": "1 day",
    "retention": "30 days",
    "compression": "zip"
}

# 代理配置
PROXY_CONFIG = {
    "enabled": False,
    "proxy_list": [
        # "http://proxy1:port",
        # "http://proxy2:port",
    ],
    "proxy_auth": {
        "username": "",
        "password": ""
    }
}

# 数据清洗配置
CLEANER_CONFIG = {
    "remove_html_tags": True,
    "remove_extra_spaces": True,
    "remove_special_chars": False,
    "max_title_length": 100,
    "max_summary_length": 500,
    "min_chapter_length": 100
}

# 定时任务配置
SCHEDULE_CONFIG = {
    "enabled": False,
    "tasks": [
        {
            "name": "daily_crawl",
            "schedule": "0 2 * * *",  # 每天凌晨2点
            "function": "crawl_new_novels"
        }
    ]
} 